//@+leo-ver=5-thin
//@+node:caminhante.20210809161411.1: * @file SourceCodeToTokenize.hpp
//@@language cplusplus
#pragma once
using namespace std;
//@+others
//@+node:caminhante.20210809161759.1: ** /includes
#include <cassert>
#include <string>
#include <cstring>
#include <fstream>
#include <iostream>
#include <streambuf>
#include <sstream>
#include <vector>
#include <limits>
#include <iterator>
#include <numeric>
#include <algorithm>
#include <functional>
//@+node:caminhante.20210809161826.1: ** just_pass
inline function<bool(int)> just_pass () {
  static a = [](int a)->bool{ (void)a; return true; };
  return a;
}
//@+node:caminhante.20210809161836.1: ** SourceCodeToTokenize
class SourceCodeToTokenize {
//@+others
//@+node:caminhante.20210809161902.1: *3* private
private:
  size_t beginning_last_line=0, current_line=1;
  size_t saved_last_line, saved_line, saved_position;
  stringstream input;
//@+node:caminhante.20210809161914.1: *3* public
public:
  SourceCodeToTokenize (string i)  : input(i) {}
  SourceCodeToTokenize (istream i) { input << i.rdbuf(); }
  //@+others
  //@+node:caminhante.20210809161947.1: *4* Read tokens
  //@+node:caminhante.20210809162104.1: *5* next
  int next () {
    if (reached_end()) return -1;
    int c = input.get();
    stats_for_next_char(c);
    return c;
  }
  //@+node:caminhante.20210809162111.1: *5* peek
  int peek () {
    if (reached_end()) return -1;
    return input.peek();
  }
  //@+node:caminhante.20210809162116.1: *5* whitespace
  string whitespace () {
    if (!isspace(peek())) return "";
    function<bool(char)> p = (int(*)(int)) &isgraph;
    return extract_until_or_preserve_state(p);
  }
  //@+node:caminhante.20210809162122.1: *5* next_token
  string next_token () {
    if (!isgraph(peek())) return "";
    function<bool(char)> p = (int(*)(int)) &isspace;
    return extract_until_or_preserve_state(p);
  }
  //@+node:caminhante.20210809162129.1: *5* read_literal
  bool read_literal (string literal) {
    if (reached_end()) return false;
    size_t lit_pos = 0;
    function<bool(char)> p = [&](char c) -> bool {
      if (lit_pos >= literal.length()-1) return true;
      return c != literal[lit_pos++];
    };
    function <bool(int)> a = [&](int l) -> bool {
      return (size_t)l == literal.length();
    };
    return extract_until_or_preserve_state(p,a) != "";
  }
  //@+node:caminhante.20210809162135.1: *5* read_number
  string read_number () {
    if (!isdigit(peek())) return "";
    function<bool(char)> p = [](char c) -> bool {
      return !isdigit(c);
    };
    return extract_until_or_preserve_state(p);
  }
  //@+node:caminhante.20210809162140.1: *5* read_string
  string read_string () {
    if (peek() != '"' && peek() != '\'') return "";
    char first_char = peek();
    bool ignore_next = false;
    unsigned int expected_quotes_ahead = 2;
    function<bool(char)> parse_string = [&](char c) -> bool {
      if (expected_quotes_ahead == 0) return true;
      if (c == '\\') {
        ignore_next = true;
        return false; }
      if (!ignore_next && c == first_char) {
        expected_quotes_ahead--;
        return false; }
      ignore_next = false;
      return false;
    };
    return extract_until_or_preserve_state(parse_string);
  }
  //@+node:caminhante.20210809162010.1: *4* Basic statistics
  //@+node:caminhante.20210809162204.1: *5* reached_end
  bool reached_end () {
    return input.tellg() == -1;
  }
  //@+node:caminhante.20210809162208.1: *5* position
  size_t position () {
    return input.tellg();
  }
  //@+node:caminhante.20210809162214.1: *5* column
  size_t column () {
    int pos = position();
    return beginning_last_line-pos+1;
  }
  //@+node:caminhante.20210809162220.1: *5* length
  size_t length () {
    int start_pos = input.tellg();
    input.seekg(0,input.end);
    size_t length = input.tellg();
    input.seekg(start_pos,input.beg);
    return length;
  }
  //@+node:caminhante.20210809162223.1: *5* line
  size_t line () {
    return current_line;
  }
  //@-others
//@+node:caminhante.20210809162021.1: *3* private
private:
  //@+others
  //@+node:caminhante.20210809162308.1: *4* extract_until_or_preserve_state
  string extract_until_or_preserve_state (
        function<bool(char)> &predicate,
        const function<bool(int)> &acceptable = just_pass()) {
    if (reached_end()) return "";
    save_reading_state();
    int extractlen = skip_until(predicate);
    restore_reading_state();
    if (acceptable(extractlen)) {
      char extr [extractlen]; input.get(extr,extractlen);
      for (char c : extr) stats_for_next_char(c);
      return string(extr);
    }
    return "";
  }
  //@+node:caminhante.20210809162315.1: *4* skip_until
  int skip_until (function<bool(char)> &predicate) {
    int start_pos = position();
    find_if(istreambuf_iterator<char>(input), istreambuf_iterator<char>(), predicate);
    int end_pos = position();
    return end_pos-start_pos+1;
  }
  //@+node:caminhante.20210809162322.1: *4* save_reading_state
  void save_reading_state () {
    saved_position = (size_t)input.tellg();
    saved_last_line = beginning_last_line;
    saved_line = current_line;
  }
  //@+node:caminhante.20210809162329.1: *4* restore_reading_state
  void restore_reading_state () {
    input.seekg(saved_position,input.beg);
    beginning_last_line = saved_last_line;
    current_line = saved_line;
  }
  //@+node:caminhante.20210809162334.1: *4* stats_for_next_char
  void stats_for_next_char (int c) {
    if (c == '\n') new_line();
  }
  //@+node:caminhante.20210809162341.1: *4* new_line
  void new_line () {
    int pos = position();
    beginning_last_line = pos+1;
    current_line++;
  }
  //@-others
//@-others
};
//@-others
//@-leo
